/* whd: loongson3C_ddr3_leveling.S
	2012.9.1
	first written by Leping from pfunc.s
	USE t8 to pass the CONFIG address
	ECC slice in not included yet
	2012.9.25 add ECC slice
*/
	b    lvl_req_set0
/* t1(0x20,0x40,...), t2(0x180,0x181,...), is used for loop, t0 is the loop count */
/* a0, a1 is used for load and store */
/* a2, a3 is used for set some parameters/judge some edges */
/* ta0 is the tmp varible always used */

/* in PRINTSTR: a0, a1, a2, v0, v1 will be changed */
/* in GET_NUMBER_OF_SLICES: t0, t1 will be changed and t0 is the output*/
/* in RDOE_SUB_TRDDATA_ADD: a0, a1, ta0 will be changed*/
/* in hexserial: ra, a0, a1, a2, a3 will be changed*/

#include "ddr_leveling_define.h"
//#define	PRINT_PREAMBLE_CHECK
#if defined(CONFIG_DDR_32BIT) || defined(CONFIG_DDR_16BIT)
#define NO_EDGE_CHECK
#else
#define PREAMBLE_CHECK_DEBUG
//#define PRINT_DDR_LEVELING
#endif
//#define SIGNAL_DEPICT_DEBUG
//#define LVL_DEBUG
#define CHANGE_DQ_WITH_DQS

#define ORDER_OF_UDIMM	0x876543210
#define ORDER_OF_RDIMM	0x765401238
#define WRDQS_LTHF_STD 	0x40
#define WRDQ_LTHF_STD 	0x40 //less then STD1 and less then STD2 will be set1
#define	RDDQS_LTHF_STD1	0x40 //greater then STD1 and less then STD2 will be set1
#define	RDDQS_LTHF_STD2	0x0
#define DLL_WRDQ_SUB	0x20
#define DLL_GATE_SUB	0x20
#define	WR_FILTER_LENGTH 	0x6
#define GATE_FILTER_LENGTH	0x6
#define PREAMBLE_LENGTH_3A9 0x60
#define PREAMBLE_LENGTH_3A8 0x60
#define MDL_CNT 0x500
#define GCL_CNT 9

#define	OFFSET_DLL_WRDQ 	0x19  // from 0x20/40/....
#define OFFSET_DLL_WRDQS 	0x1a
#define OFFSET_DLL_GATE 	0x18
#define OFFSET_WRDQ_LTHF 	0x0
#define	OFFSET_WRDQS_LTHF 	0x1
#define OFFSET_RDDQS_LTHF 	0x2
#define OFFSET_RDOE_BEGIN	0xe
#define OFFSET_RDOE_END	0xf
#define OFFSET_ODTOE_BEGIN 	0x14
#define OFFSET_ODTOE_END	0x15

	.global ddr3_leveling
	.ent    ddr3_leveling
ddr3_leveling:

	move s5,ra

//#define PM_DPD_FRE// change parameters depend on frequency
#ifdef PM_DPD_FRE

#for 3a8, different frequency will use different rd_oe_start/stop
#frequency 500M, rd_oe_begin/end 0x03030202
#frequency 600M, rd_oe_begin/end 0x03030000
	li      t1, 0xbfe001c0
	lw      a1, 0x0(t1)
	dsrl    t1, a1, 14 //DDR_LOOPC
	and     t1, t1, 0x3ff
	dsrl    a1, a1, 24 //DDR_DIV
	and     a1, a1, 0x3f

	//DDR_DIV: 4 or 8
	dli     ta0, 0x4
	beq     a1, ta0, 1f
	nop
	dsrl    t1, t1, 1
1:
	dsrl    t1, t1, 2

	dli     ta0, 15
	bgt     t1, ta0, 3f
	nop

	//<= 500M, for udimm, add rd_oe_start/stop by 0x2 and sub tPHY_RDDATA by 0x1
	//         for rdimm, only sub tPHY_RDDATA by 0x1
	GET_DIMM_TYPE
	bnez    a1, 4f //RDIMM
	nop

	//temp code for Kinston 2G UDIMM, at 400MHz, only sub tPHY_RDDATA by 0x1
	dli     ta0, 12
	beq     t1, ta0, 4f
	nop


/* identify wheather there is ecc slice */
	li      t0, 0x8
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 1f //when rd_after_write enabled, the 9th device may don't need leveling
	nop
	daddu   t0, t0, 0x1

1:

	dli     t1, 0x28
	or      t1, t1, t8

2:
	ld      a0, 0x0(t1)
	dli     ta0, 0x020200000000
	daddu   a0, a0, ta0
	sd      a0, 0x0(t1)
	daddu   t1, t1, 0x20
	dsubu   t0, t0, 0x1
	bnez    t0, 2b
	nop

4: //FOR RDIMM
	ld      a0, 0x1c0(t8)
	dsubu   a0, a0, 0x1
	sd      a0, 0x1c0(t8)

	//> 500M
3:


#endif
#only the gate dll is bypassed at the beginning of leveling
#while other dlls' bypass is set at the end of leveling
#ifdef DDR_DLL_BYPASS
	dli     t1, 0x0
	or      t1, t1, t8
	ld      a1, 0x0(t1)
	dli     ta0, 0x0000ffff00000000
	and     a1, a1, ta0
	dsrl    a1, a1, 32 // dll_value store in a1
//	daddu   a1, a1, 2

	//set dll_ck0
	dli     t1, 0x18
	or      t1, t1, t8
	lb      a0, 0x4(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x4(t1)

	//set dll_ck1
	dli     t1, 0x18
	or      t1, t1, t8
	lb      a0, 0x5(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x5(t1)

	//set dll_ck2
	dli     t1, 0x18
	or      t1, t1, t8
	lb      a0, 0x6(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x6(t1)

	//set dll_ck3
	dli     t1, 0x18
	or      t1, t1, t8
	lb      a0, 0x7(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x7(t1)

#endif
/* 1. wait until init done */
	dli     t1, 0x160
	or      t1, t1, t8
wait_dram_init_done:
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000ff000000
	and     a0, a0, ta0
	beqz    a0, wait_dram_init_done
	nop

write_leveling:
	PRINTSTR("\r\nwrite leveling begin\r\n")

/* 2. set all dll to be 0 */
	GET_NUMBER_OF_SLICES
	dli     t1, 0x0
	or      t1, t1, t8
dll_wrdqs_set0:
	daddu   t1, t1, 0x20
	li	a0, 0x0
	sb      a0, OFFSET_DLL_WRDQS(t1)
	subu    t0, t0, 0x1
	bnez    t0, dll_wrdqs_set0
	nop

	PRINTSTR("\r\nall dll_wrdqs set 0\r\n")

/* 3. set leveling mode to be WRITE LEVELING */
lvl_mode_set01:
	dli	a0, 0x1
	sb	a0, 0x180(t8)

	PRINTSTR("\r\nset leveling mode to be WRITE LEVELING\r\n")

/* 4. check whether to start leveling */
lvl_ready_sampling:
	lb      a0, 0x185(t8)
	beqz    a0, lvl_ready_sampling
	nop

	PRINTSTR("\r\nwrite leveling ready\r\n")

/* 5. Set leveling req */

	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
	dli	t2, 0x180
	or	t2, t2, t8

	dli             ta2, 0x0
lvl_req_set0:
	dli	a0, 0x1
	sb	a0, 0x181(t8)
	dli	a0, 0x0
	sb	a0, 0x181(t8)

#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrite leveling req set0\r\n")
#endif

/* 6. check whether this leveling request done */
lvl_done_sampling0:
	lb	a0, 0x186(t8)
	beqz	a0, lvl_done_sampling0
	nop

#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrite leveling done\r\n")
#endif

lvl_resp_set0:
	lb	a0, 0x7(t2)
	dli	ta0, 0x1
	and 	a0, a0, ta0
	beqz	a0, resp_set0_done
	nop

dll_wrdqs_add0:
#ifdef LVL_DEBUG
	PRINTSTR("\r\nslice ")
	dli	a0, 0x8
	dsubu	a0, a0, t0
//	bal	hexserial4
	nop
	PRINTSTR(" add to get 0\r\n")
#endif
	lb	a0, OFFSET_DLL_WRDQS(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQS(t1)

#ifdef CHANGE_DQ_WITH_DQS
	lb	a0, OFFSET_DLL_WRDQS(t1) // get dll_wrdqs
	blt	a0, WRDQS_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
2:
	dsubu	a0, a0, 0x20
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQ(t1) // set dll_wrdata

	blt	a0, WRDQ_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
2:
#endif
	bgt     ta2, MDL_CNT, leveling_failed
	nop
	daddu   ta2, ta2, 0x1
	b 	lvl_req_set0
	nop

resp_set0_done:
#ifdef	LVL_DEBUG
	PRINTSTR("\r\n 0 is found\r\n")
#endif
	dsubu	t0, t0, 0x1
	daddu	t1, t1, 0x20
	daddu	t2, t2, 0x1
	dli     ta2, 0x0
	bnez	t0, lvl_req_set0
	nop

/* 0 to 1 */
	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
	dli	t2, 0x180
	or	t2, t2, t8
	dli	s7, WR_FILTER_LENGTH
lvl_req_set1:
	dli	a0, 0x1
	sb	a0, 0x181(t8)
	dli	a0, 0x0
	sb	a0, 0x181(t8)

#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrite leveling req set1\r\n")
#endif

lvl_done_sampling1:
	lb	a0, 0x186(t8)
	beqz	a0, lvl_done_sampling1
	nop

#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrite leveling done\r\n")
#endif

lvl_resp_set1:
	lb	a0, 0x7(t2)
	dli	ta0, 0x1
	and 	a0, a0, ta0
	bnez	a0, resp_set1_done
	nop

	dli	s7, WR_FILTER_LENGTH
dll_wrdqs_add1:
	lb	a0, OFFSET_DLL_WRDQS(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQS(t1)

#ifdef CHANGE_DQ_WITH_DQS
	lb	a0, OFFSET_DLL_WRDQS(t1) // get dll_wrdqs
	blt	a0, WRDQS_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
2:
	dsubu	a0, a0, 0x20
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQ(t1) // set dll_wrdata

	blt	a0, WRDQ_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
2:
#endif
	bgt     ta2, MDL_CNT, leveling_failed
	nop
	daddu   ta2, ta2, 0x1
	b 	lvl_req_set1
	nop

resp_set1_done:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n 1 is found @ slice")
	dli	a0, 0x8
	dsubu	a0, a0, t0
	bal 	hexserial
	nop
#endif
	dsubu	s7, s7, 0x1
	bnez	s7, dll_wrdqs_add1
	nop
	dli	s7, WR_FILTER_LENGTH

//  return the more add
	lb	a0, OFFSET_DLL_WRDQS(t1)
	dsubu	a0, a0, WR_FILTER_LENGTH
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQS(t1)

#ifdef CHANGE_DQ_WITH_DQS
	lb	a0, OFFSET_DLL_WRDQS(t1) // get dll_wrdqs
	blt	a0, WRDQS_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
2:
	dsubu	a0, a0, 0x20
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQ(t1) // set dll_wrdata

	blt	a0, WRDQ_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
2:
#endif
	dsubu	t0, t0, 0x1
	daddu	t1, t1, 0x20
	daddu	t2, t2, 0x1
	dli	    ta2, 0x0
	bnez	t0, lvl_req_set1
	nop

write_leveling_done:
#ifdef PRINT_DDR_LEVELING
	PRINTSTR("\r\n The MC param after write leveling 0 to 1 is:\r\n")
	PRINT_THE_MC_PARAM
#endif

/* 8. All 1 found, set params according to wrdqs */

//	GET_DIMM_TYPE
//	beqz    a1, 81f
//	nop

/* adjust wrdqs carefully */
#if 0   //def  DEBUG_DDR_PARAM   //print registers
	PRINTSTR("\r\nThe MC param before carefully adjust is:\r\n")
	PRINT_THE_MC_PARAM
#endif
wrdqs_adjust:
#if 1
#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrdqs around 0x00 carefully adjust begin\r\n")
#endif
	GET_NUMBER_OF_SLICES
	daddu	t0, t0, 0x1
	dli     a2, 0x08
	dli     a3, 0x78
	dli     t1, 0x00
	or      t1, t1, t8
	WRDQS_ADJUST_LOOP

	GET_NUMBER_OF_SLICES
	daddu	t0, t0, 0x1
	dli     a2, 0x28
	dli     a3, 0x18
	dli     t1, 0x00
	or      t1, t1, t8
	WRDQS_ADJUST_LOOP

	GET_NUMBER_OF_SLICES
	daddu	t0, t0, 0x1
	dli     a2, 0x48
	dli     a3, 0x38
	dli     t1, 0x00
	or      t1, t1, t8
	WRDQS_ADJUST_LOOP

	GET_NUMBER_OF_SLICES
	daddu	t0, t0, 0x1
	dli     a2, 0x68
	dli     a3, 0x58
	dli     t1, 0x00
	or      t1, t1, t8
	WRDQS_ADJUST_LOOP


#ifdef LVL_DEBUG
	PRINTSTR("\r\nwrdqs around 0x00 carefully adjust end\r\n")
#endif
#endif

#if 0   //def  DEBUG_DDR_PARAM   //print registers
	PRINTSTR("\r\nThe MC param after carefully adjust is:\r\n")
	PRINT_THE_MC_PARAM
#endif
81:

#if 1
/* 8.1 adjust wrdata */

/* t0 is used to indicate 8 slices */
	GET_NUMBER_OF_SLICES
	dli     t1, 0x20
	or      t1, t1, t8
dll_wrdata_set:
	lb	a0, OFFSET_DLL_WRDQS(t1) // get dll_wrdqs
	blt	a0, WRDQS_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQS_LTHF(t1) //set wrdqs_lt_half
2:
	dsubu	a0, a0, DLL_WRDQ_SUB
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, OFFSET_DLL_WRDQ(t1) // set dll_wrdata

	blt	a0, WRDQ_LTHF_STD, 1f
	nop
	li	ta0, 0x0
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
	b	2f
	nop
1:
	li	ta0, 0x1
	sb	ta0, OFFSET_WRDQ_LTHF(t1) //set wrdq_lt_half
2:
	daddu	t1, t1, 0x20
	dsubu	t0, t0, 0x1
	bnez	t0, dll_wrdata_set
	nop
#endif


wrdq_lt_half_test:
	dli	s7, 0x0 // s7 represent whether find 1 to 0 or not
	GET_DIMM_TYPE
	bnez    a1, rdimm_wrdq_lt_half_test
	nop
	li      t0, 0x7 //only loop 7 times
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 1f //when rd_after_write enabled, the 9th device may don't need leveling
	nop
	daddu   t0, t0, 0x1
1:
	dli     t2, 0x0
wrdq_lt_half_test_loop:
	dli	a0, ORDER_OF_UDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
#if 1
	bal	hexserial
	nop
#endif

	daddu   t2, t2, 0x1
	bgt     t2, t0, record_slice_num
	nop
	lb      a0, 0x0(t1)
	beqz    a0, wrdq_lt_half_test_loop
	nop

	dli	a0, ORDER_OF_UDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
#if 1
	bal	hexserial
	nop
#endif

	lb	a0, 0x0(t1)
	beqz    a0, record_slice_num
	nop
	b       wrdq_lt_half_test_loop
	nop

record_slice_num:
	move    t3, t2 //the slice number save in t3
	move    a0, t3
	bal	hexserial
	nop
	beq     t3, 0x8, first_slice_wrdq_lt_half_test
	nop

wrdq_clkdelay_set:
//	li      t0, 0x7 //only loop 7 times
	dli     t2, 0x0
wrdq_clkdelay_set_loop:
	daddu   t2, t2, 0x1
	bgt     t2, t0, first_slice_wrdq_lt_half_test
	nop

	dli	a0, ORDER_OF_UDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	daddu	t1, t1, 0x10

	ld      a0, 0x0(t1)
	blt     t2, t3, wrdq_clkdelay_set0
	nop
	b       wrdq_clkdelay_set1
	nop

wrdq_clkdelay_set0:
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	sd      a0, 0x0(t1)
	b       wrdq_clkdelay_set_loop
	nop

wrdq_clkdelay_set1:
	dli 	s7, 0x1
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x0(t1)
	b       wrdq_clkdelay_set_loop
	nop

first_slice_wrdq_lt_half_test:
	beq	s7, 0x1, trddata_tphywrdata_sub
	nop
	dli	a0, ORDER_OF_UDIMM
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8

	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000000000ff
	and     a0, a0, ta0
	beqz    a0, write_leveling_exit
	nop


trddata_tphywrdata_sub:
	/* tRDDATA sub one */
	dli     t2, 0x1c0
	or      t2, t2, t8
	ld      a0, 0x0(t2)
	dli     ta0, 0x01
	dsubu   a0, a0, ta0
	sd      a0, 0x0(t2)
	/* tPHY_WRDATA sub one */
	dli     t2, 0x1d0
	or      t2, t2, t8
	ld      a0, 0x0(t2)
	dli     ta0, 0x100000000
	dsubu   a0, a0, ta0
	sd      a0, 0x0(t2)
	b       write_leveling_exit
	nop

rdimm_wrdq_lt_half_test:
/* identify wheather there is ecc slice */
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
//	dli     t2, 0x0
	bne     a0, t1, rdimm_wrdq_lt_half_test_3210
	nop

rdimm_wrdq_lt_half_test_83:
	li      t0, 0x4
	dli     t2, 0x0
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	lb      a0, 0x0(t1)
	daddu   t2, t2, 0x1
	beqz    a0, rdimm_wrdq_lt_half_test_loop_3210
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	lb      a0, 0x0(t1)
	beqz    a0, rdimm_record_slice_num_83210
	nop
	b       rdimm_wrdq_lt_half_test_loop_3210
	nop


rdimm_wrdq_lt_half_test_3210:
	li      t0, 0x4
	dli     t2, 0x1
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8

rdimm_wrdq_lt_half_test_loop_3210:
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	daddu   t2, t2, 0x1
	bgt     t2, t0, rdimm_wrdq_lt_half_test_4567
	nop
#ifdef LVL_DEBUG
	move	a0, t1
	bal	hexserial
	nop
#endif
	lb      a0, 0x0(t1)
	beqz    a0, rdimm_wrdq_lt_half_test_loop_3210
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	lb      a0, 0x0(t1)
	beqz    a0, rdimm_record_slice_num_3210
	nop
	b       rdimm_wrdq_lt_half_test_loop_3210
	nop

rdimm_record_slice_num_3210:
rdimm_record_slice_num_83210:
	move    t3, t2
#ifdef LVL_DEBUG
	PRINTSTR("\r\nt3=")
	move 	a0, t3
	bal	hexserial
	nop
#endif

/* identify wheather there is ecc slice */
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, rdimm_wrdq_clkdelay_set_3210
	nop
rdimm_wrdq_clkdelay_set_8:
	li      t0, 0x4
	dli     t2, 0x0
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	daddu	t1, t1, 0x10
	or	t1, t1, t8
//	daddu   t2, t2, 0x1
	ld      a0, 0x0(t1)
	blt     t2, t3, rdimm_wrdq_clkdelay_set0_8
	nop
	b       rdimm_wrdq_clkdelay_set1_8
	nop

rdimm_wrdq_clkdelay_set0_8:
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	sd      a0, 0x0(t1)
	dli     t1, 0xb0 //here set 0xb0 because it will sub 0x20 later
	or      t1, t1, t8
	b       rdimm_wrdq_clkdelay_set_loop_3210
	nop

rdimm_wrdq_clkdelay_set1_8:
	dli 	s7, 0x1
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x0(t1)
	dli     t1, 0xb0 //here set 0xb0 because it will sub 0x20 later
	or      t1, t1, t8
	b       rdimm_wrdq_clkdelay_set_loop_3210
	nop

rdimm_wrdq_clkdelay_set_3210:
	li      t0, 0x4
	dli     t2, 0x1
rdimm_wrdq_clkdelay_set_loop_3210:
1:
	daddu   t2, t2, 0x1
	bgt     t2, t0, rdimm_wrdq_lt_half_test_4567
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	daddu	t1, t1, 0x10
	or	t1, t1, t8
	ld      a0, 0x0(t1)
	blt     t2, t3, rdimm_wrdq_clkdelay_set0_3210
	nop
	b       rdimm_wrdq_clkdelay_set1_3210
	nop

rdimm_wrdq_clkdelay_set0_3210:
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	sd      a0, 0x0(t1)
	b       1b
	nop

rdimm_wrdq_clkdelay_set1_3210:
	dli 	s7, 0x1
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x0(t1)
	b       1b
	nop


rdimm_wrdq_lt_half_test_4567:
	li      t0, 0x8
	dli     t2, 0x5

rdimm_wrdq_lt_half_test_loop_4567:
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	daddu   t2, t2, 0x1
	bgt     t2, t0, slice_8_wrdq_lt_half_test
	nop
	lb      a0, 0x0(t1)
	beqz    a0, rdimm_wrdq_lt_half_test_loop_4567
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	lb      a0, 0x0(t1)
	beqz    a0, rdimm_record_slice_num_4567
	nop
	b       rdimm_wrdq_lt_half_test_loop_4567
	nop

rdimm_record_slice_num_4567:
	move    t3, t2 //the slice number save in t3
	dli     ta1, 0x0

rdimm_wrdq_clkdelay_set_4567:
	li      t0, 0x8 //only loop 7 times
	dli     t2, 0x5
rdimm_wrdq_clkdelay_set_loop_4567:
	daddu   t2, t2, 0x1
	bgt     t2, t0, slice_8_wrdq_lt_half_test
	nop
	dli     a0, ORDER_OF_RDIMM
	dli     ta0, 0x4
	mulou   a1,	t2, ta0
	dsrl    a0, a0, a1
	and     a0, a0, 0xf
	daddu   a0, a0, 0x1
	dli     ta0, 0x20
	mulou   t1, a0, ta0
	daddu   t1, t1, 0x10
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	blt     t2, t3, rdimm_wrdq_clkdelay_set0_4567
	nop
	b       rdimm_wrdq_clkdelay_set1_4567
	nop

rdimm_wrdq_clkdelay_set0_4567:
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	sd      a0, 0x0(t1)
	b       rdimm_wrdq_clkdelay_set_loop_4567
	nop

rdimm_wrdq_clkdelay_set1_4567:
	dli     s7, 0x1
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x0(t1)

	bnez    ta1, 1f
	nop
/* identify wheather there is ecc slice */
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
//	dli     t2, 0x0
	bne     a0, t1, noecc
	nop

	dli     ta0, 0x20
	dli     a0, 0x9
	mulou   t1, a0, ta0
	or      t1, t1, t8
	lb      a0, 0x0(t1)
	bnez    a0, 1f
	nop
noecc:
	dli     ta0, 0x20
	dli     a0, 0x4
	mulou   t1, a0, ta0
	or      t1, t1, t8
	lb      a0, 0x0(t1)
	bnez    a0, 1f
	nop

	dli     a0, 0x3
	mulou   t1, a0, ta0
	or      t1, t1, t8
	lb      a0, 0x0(t1)
	bnez    a0, 1f
	nop

	dli	    a0, 0x2
	mulou   t1, a0, ta0
	or      t1, t1, t8
	lb      a0, 0x0(t1)
	bnez    a0, 1f
	nop

	dli     a0, 0x1
	mulou   t1, a0, ta0
	or      t1, t1, t8
	lb      a0, 0x0(t1)
	bnez    a0, 1f
	nop

	ld      a0, 0x30(t8)
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x30(t8)

	ld      a0, 0x50(t8)
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x50(t8)

	ld      a0, 0x70(t8)
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x70(t8)

	ld      a0, 0x90(t8)
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x90(t8)

	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
//	dli     t2, 0x0
	bne     a0, t1, 1f
	nop
	ld      a0, 0x130(t8)
	dli     ta0, 0xffffff00ffffffff
	and     a0, a0, ta0
	dli     ta0, 0x0000000100000000
	or      a0, a0, ta0
	sd      a0, 0x130(t8)
1:
	daddu   ta1, ta1, 0x1
	b       rdimm_wrdq_clkdelay_set_loop_4567
	nop

slice_8_wrdq_lt_half_test:
	beq	s7, 0x1, rdimm_trddata_tphywrdata_sub
	nop
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, slice_3_wrdq_lt_half_test
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	t2, 0x0
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000000000ff
	and     a0, a0, ta0
	bnez    a0, rdimm_trddata_tphywrdata_sub
	nop
	b       slice_4_wrdq_lt_half_test
	nop

slice_3_wrdq_lt_half_test:
	beq	s7, 0x1, rdimm_trddata_tphywrdata_sub
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	t2, 0x1
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000000000ff
	and     a0, a0, ta0
	bnez    a0, rdimm_trddata_tphywrdata_sub
	nop

slice_4_wrdq_lt_half_test:
	beq	s7, 0x1, rdimm_trddata_tphywrdata_sub
	nop
	dli	a0, ORDER_OF_RDIMM
	dli	t2, 0x5
	dli	ta0, 0x4
	mulou	a1,	t2, ta0
	dsrl	a0, a0, a1
	and	a0, a0, 0xf
	daddu	a0, a0, 0x1
	dli	ta0, 0x20
	mulou	t1, a0, ta0
	or	t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000000000ff
	and     a0, a0, ta0
	beqz    a0, write_leveling_exit
	nop

rdimm_trddata_tphywrdata_sub:
	/* tRDDATA sub one */
	dli     t2, 0x1c0
	or      t2, t2, t8
	ld      a0, 0x0(t2)
	dli     ta0, 0x01
	dsubu   a0, a0, ta0
	sd      a0, 0x0(t2)
	/* tPHY_WRDATA sub one */
	dli     t2, 0x1d0
	or      t2, t2, t8
	ld      a0, 0x0(t2)
	dli     ta0, 0x100000000
	dsubu   a0, a0, ta0
	sd      a0, 0x0(t2)

write_leveling_exit:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	sd      a0, 0x0(t1)

	b       gate_leveling
//	b       100f
	nop

gate_leveling:
#if 1 //3a3000 new
//	PRINTSTR("\r\nset cs_zq to be same with cs_enable\r\n")
	lb      a0, 0x169(t8)
	sb      a0, 0x16a(t8)

reset_init_start_new:
	dli     t1, 0x18
	or      t1, t1, t8
	dli	a0, 0x0
	sb      a0, 0x0(t1)

	dli     a0, 0x1
	sb      a0, 0x0(t1)

wait_init_done_new:
	dli     t1, 0x160
	or      t1, t1, t8
	lb      a0, 0x3(t1)
	beqz    a0, wait_init_done_new
	nop

reset_init_start_new2:
	dli     t1, 0x18
	or      t1, t1, t8
	dli	a0, 0x0
	sb      a0, 0x0(t1)

	dli     a0, 0x1
	sb      a0, 0x0(t1)

wait_init_done_new2:
	dli     t1, 0x160
	or      t1, t1, t8
	lb      a0, 0x3(t1)
	beqz    a0, wait_init_done_new2
	nop
#endif

	PRINTSTR("\r\nwrite leveling finish and gate leveling begin\r\n")
#ifdef  PRINT_DDR_LEVELING   //print registers
	PRINTSTR("\r\nThe MC param after write leveling is:\r\n")
	PRINT_THE_MC_PARAM
#endif

/* identify wheather there is ecc slice */
	GET_NUMBER_OF_SLICES
	dli     t1, 0x20
	or      t1, t1, t8
dll_gate_set0:
	dli	a0, 0x0
#ifdef DDR_DLL_BYPASS
	dli	a0, 0x80
#endif
	sb      a0, OFFSET_DLL_GATE(t1)
	subu    t0, t0, 0x1
	daddu	t1, t1, 0x20
	bnez    t0, dll_gate_set0
	nop

glvl_mode_set10:
	dli     t1, 0x180
	or      t1, t1, t8
	dli	a0, 0x2
	sb      a0, 0x0(t1)

	dli     a1, 0x1
glvl_ready_sampling:
	dli     t1, 0x180
	or      t1, t1, t8
	lb      a0, 0x5(t1)
	bne     a0, a1, glvl_ready_sampling
	nop

#ifdef SIGNAL_DEPICT_DEBUG
	PRINTSTR("\r\nthe signal depict begin:\r\n")
	dli	t1, 0x28     // save the init para before signal depict
	or 	t1, t1, t8
	lb	a0, 0x7(t1)
	dli	t1, 0x350
	or	t1, t1, t8
	sb	a0, 0x7(t1)
	dli	t1, 0x1c0
	or	t1, t1, t8
	lb	a0, 0x0(t1)
	dli	t1, 0x350
	or	t1, t1, t8
	sb	a0, 0x6(t1)

	dli	t1, 0x28
	or	t1, t1, t8
	dli	ta1, 0x180
	or	ta1, ta1, t8
	dli	t0, 0x8
	dli	t2, 0x0
	dli	s6, 0x0
	dli	s7, 0x0
t_glvl_req_set:
	bne	s6, 0x15, 1f
	nop
	dli	s6, 0x0		//reset trddata
	lb	a0, 0x356(t8)
	sb	a0, 0x1c0(t8)
	dsubu	t0, t0, 0x1
	beqz	t0, signal_depict_end
	nop
	daddu	t1, t1, 0x20
	daddu	ta1, ta1, 0x1
	PRINTSTR("\r\nthe above is slice ")
	dli	ta0, 0x8
	dsubu 	a0, ta0, t0
	bal	hexserial
	nop
	PRINTSTR("\r\n")
1:
	dli     ta0, 0x180
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x1(ta0)
	dli	a0, 0x0
	sb      a0, 0x1(ta0)

1:			//glvl_done_sampling
	dli     ta0, 0x180
	or      ta0, ta0, t8
	lb      a0, 0x6(ta0)
	bne     a0, 0x1, 1b
	nop

	lb	a0, 0x7(ta1)
	dli	ta0, 0x1
	and	a0, a0, ta0
	move	a1, a0
#if 1
	dli     ta0, 0x180
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x1(ta0)
	dli	a0, 0x0
	sb      a0, 0x1(ta0)

1:			//glvl_done_sampling
	dli     ta0, 0x180
	or      ta0, ta0, t8
	lb      a0, 0x6(ta0)
	bne     a0, 0x1, 1b
	nop

	lb	a0, 0x7(ta1)
	dli	ta0, 0x1
	and	a0, a0, ta0
	or	a0, a0, a1
	move	a1, a0
#endif
#if 1
	dli     ta0, 0x180
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x1(ta0)
	dli	a0, 0x0
	sb      a0, 0x1(ta0)

1:			//glvl_done_sampling
	dli     ta0, 0x180
	or      ta0, ta0, t8
	lb      a0, 0x6(ta0)
	bne     a0, 0x1, 1b
	nop

	lb	a0, 0x7(ta1)
	dli	ta0, 0x1
	and	a0, a0, ta0
	or	a0, a0, a1
#endif

	sll	a0, a0, 0x1f
	srl	a0, a0, s7
	or	t2, t2, a0
	daddu	s7, s7, 0x1
	blt	s7, 0x20, 1f // every 0x20 print the status
	nop
	move 	a0, t2
	bal	hexserial
	nop
	PRINTSTR(" ")
	dli	t2, 0x0
	dli	s7, 0x0
	daddu	s6, s6, 0x1
1:

#if 1
	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, 0x10(t1)
	bnez	a0, 1f
	nop
	lb	a0, 0x1c0(t8)
	daddu	a0, a0, 0x1
	sb	a0, 0x1c0(t8)
1:
#else
	lb	a0, 0x10(t1)
	dsubu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
	sb	a0, 0x10(t1)
	bne	a0, 0x7f,1f
	nop
	lb	a0, 0x1c0(t8)
	dsubu	a0, a0, 0x1
	sb	a0, 0x1c0(t8)
1:
#endif
	b	t_glvl_req_set
	nop

signal_depict_end:
//identify wheather there is ecc slice
	GET_NUMBER_OF_SLICES
	dli	t1, 0x28
	or	t1, t1, t8
reset_rd_oe:
	dli	ta0, 0x350
	or	ta0, ta0, t8
	lb	a0, 0x7(ta0)
	sb	a0, 0x7(t1)
	sb	a0, 0x6(t1)
	daddu	t1, t1, 0x20
	dsubu	t0, t0, 0x1
	bnez	t0, reset_rd_oe
	nop

	dli	t1, 0x350 // reset trddata
	or	t1, t1, t8
	lb	a0, 0x6(t1)
	dli	t1, 0x1c0
	or	t1, t1, t8
	sb	a0, 0x0(t1)

	GET_NUMBER_OF_SLICES
	dli     t1, 0x20
	or      t1, t1, t8
11:
	dli	a0, 0x0
#ifdef DDR_DLL_BYPASS
	dli	a0, 0x80
#endif
	sb      a0, OFFSET_DLL_GATE(t1)
	subu    t0, t0, 0x1
	daddu	t1, t1, 0x20
	bnez    t0, 11b
	nop
	PRINTSTR("\r\n")
#endif

/* gate leveling set 1 to 0 */
	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
	dli	t2, 0x180
	or	t2, t2, t8
glvl_req_set0:
	dli	a0, 0x1
	sb	a0, 0x181(t8)
	dli	a0, 0x0
	sb	a0, 0x181(t8)

glvl_done_sampling0:
	lb	a0, 0x186(t8)
	beqz	a0, glvl_done_sampling0
	nop

	dli     ta2, 0x0
glvl_resp_set0:
	lb	a0, 0x7(t2)
	dli	ta0, 0x3
	and	a0, a0, ta0
	beqz	a0, glvl_resp_set0_done
	nop

dll_gate_add0:
	lb	a0, OFFSET_DLL_GATE(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	t2, 0x4(t8)
	daddu	t2, t2, 0x2
1:
	blt	a0, t2, 2f
	nop
	dsubu	a0, a0, t2
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, OFFSET_DLL_GATE(t1)
	dli	ta0, 0x7f
	and	a0, a0, ta0
	bnez	a0, 1f
	nop

	lb	a0, OFFSET_RDOE_BEGIN(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_BEGIN(t1)
	lb	a0, OFFSET_RDOE_END(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_END(t1)
	RDOE_SUB_TRDDATA_ADD
/*
	lb	a0, OFFSET_ODTOE_BEGIN(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_BEGIN(t1)
	lb	a0, OFFSET_ODTOE_END(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_END(t1)
*/
1:
	bgt     ta2, MDL_CNT, leveling_failed
	nop
	daddu   ta2, ta2, 0x1
	b 	glvl_req_set0
	nop

glvl_resp_set0_done:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n gate leveling 0 is found\r\n")
#endif
	dsubu	t0, t0, 0x1
	daddu	t1, t1, 0x20
	daddu	t2, t2, 0x1
	dli     ta2, 0x0
	bnez	t0, glvl_req_set0
	nop

#ifdef  PRINT_DDR_LEVELING   //print registers
	PRINTSTR("\r\nThe MC param after gate leveling 1 to 0 is:\r\n")
	PRINT_THE_MC_PARAM
#endif

/* unknown reason to reset init_start */
reset_init_start:
	dli     t1, 0x18
	or      t1, t1, t8
	dli	a0, 0x0
	sb      a0, 0x0(t1)

	dli     a0, 0x1
	sb      a0, 0x0(t1)

wait_init_done:
	dli     t1, 0x160
	or      t1, t1, t8
	lb      a0, 0x3(t1)
	beqz    a0, wait_init_done
	nop

/* 0 to 1 */
	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
	dli	t2, 0x180
	or	t2, t2, t8
	dli	s7, GATE_FILTER_LENGTH
glvl_req_set1:
#ifdef LVL_DEBUG
	PRINTSTR("\r\ngate leveling req\r\n")
#endif
	dli	a0, 0x1
	sb	a0, 0x181(t8)
	dli	a0, 0x0
	sb	a0, 0x181(t8)

glvl_done_sampling1:
	lb	a0, 0x186(t8)
	beqz	a0, glvl_done_sampling1
	nop

glvl_resp_set1:
	lb	a0, 0x7(t2)
	dli	ta0, 0x3
	and	a0, a0, ta0
	bnez	a0, glvl_resp_set1_done
	nop
	dli	s7, GATE_FILTER_LENGTH

dll_gate_add1:
	lb	a0, OFFSET_DLL_GATE(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	ta0, 0x4(t8)
	daddu	ta0, ta0, 0x2
1:
	blt	a0, ta0, 2f
	nop
	dsubu	a0, a0, ta0
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, OFFSET_DLL_GATE(t1)
	dli	t3, 0x7f
	and	a0, a0, t3
	bnez	a0, 1f
	nop

	lb	a0, OFFSET_RDOE_BEGIN(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_BEGIN(t1)
	lb	a0, OFFSET_RDOE_END(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_END(t1)
	RDOE_SUB_TRDDATA_ADD
/*
	lb	a0, OFFSET_ODTOE_BEGIN(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_BEGIN(t1)
	lb	a0, OFFSET_ODTOE_END(t1)
	daddu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_END(t1)
*/
1:
	bgt     ta2, MDL_CNT,  leveling_failed
	nop
	daddu   ta2, ta2, 0x1
	b 	    glvl_req_set1
	nop

glvl_resp_set1_done:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n gate leveling 1 is found @ slice")
	dli	a0, 0x8
	dsubu	a0, a0, t0
	bal 	hexserial
	nop
#endif
	dsubu	s7, s7, 0x1
	bnez	s7, dll_gate_add1
	nop
	dli	s7, GATE_FILTER_LENGTH

//return the more add
	lb	a0, OFFSET_DLL_GATE(t1)
	and	a0, a0, 0x7f
	dli	ta0, GATE_FILTER_LENGTH
	dsubu	ta0, ta0, 0x1
	blt	a0, ta0, 1f	// if a0 less then ta0, sub ta0
	nop
	dsubu	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	ori	a0, a0, 0x80
#endif
	sb	a0, OFFSET_DLL_GATE(t1)
	b	2f
	nop
1:
	dli	a1, 0x80
#ifdef DDR_DLL_BYPASS
	lb	a1, 0x4(t8)
	daddu	a1, a1, 0x2
#endif
	lb	a0, OFFSET_DLL_GATE(t1)
	dli	ta0, GATE_FILTER_LENGTH
	dsubu	ta0, ta0, 0x1
	daddu	a0, a0, a1
	dsubu	a0, a0 ,ta0
	sb	a0, OFFSET_DLL_GATE(t1)

	lb	a0, OFFSET_RDOE_BEGIN(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_BEGIN(t1)
	lb	a0, OFFSET_RDOE_END(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_END(t1)
	RDOE_ADD_TRDDATA_SUB
/*
	lb	a0, OFFSET_ODTOE_BEGIN(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_BEGIN(t1)
	lb	a0, OFFSET_ODTOE_END(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_END(t1)
*/
2:

	dsubu	t0, t0, 0x1
	daddu	t1, t1, 0x20
	daddu	t2, t2, 0x1
	dli     ta2, 0x0
	bnez	t0, glvl_req_set1
	nop



#ifdef  PRINT_DDR_LEVELING   //print registers
	PRINTSTR("\r\nThe MC param after gate leveling 0 to 1 is:\r\n")
	PRINT_THE_MC_PARAM
#endif

#ifdef	PREAMBLE_CHECK_DEBUG

	dli	s7, 0x8
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 1f //when rd_after_write enabled, the 9th device may don't need leveling
	nop
	daddu	s7, s7, 0x1
1:
//	dli	s7, 0x5
	dli	t1, 0x28
	or 	t1, t1, t8
	dli	t2, 0x180
	or	t2, t2, t8

	dli     ta1,     0x0
preamble_check_init:
/* check the preamble exist */
	PRINTSTR("\r\nPREAMBLE CHECK!!\r\n")
// set the gate signal 0.75 period before
	dli 	s6,	PREAMBLE_LENGTH_3A9 //s6 represents 0.75 period to be checked
	dli	a3, 0x80
	dli	ta0, 0x0
	or	ta0, ta0, t8
	lb	a0, 0x0(ta0)
	beq	a0, 0x2, 1f
	nop
	dli	s6, PREAMBLE_LENGTH_3A8
1:
#ifdef DDR_DLL_BYPASS
	lb	a2, 0x4(t8)
	and	a2, a2, 0x7f
	daddu	a2, a2, 0x2
	move	a3, a2
	dsrl	a2, a2, 0x2
	dsubu	a2, a3, a2
	dli	ta0, 0x7f
	and	a2, a2, ta0
	move 	s6, a2
#endif

	lb	a0, 0x7(t1)	// if the rd_oe > 4 the set the rd_oe = 3
	blt	a0, 0x4, 1f
	nop
	dli	a0, 0x3
	sb	a0, 0x7(t1)
1:
	lb	a0, 0x6(t1)
	blt	a0, 0x4, 1f
	nop
	dli	a0, 0x3
	sb	a0, 0x6(t1)
1:

	lb	a0, 0x10(t1)
	and	a0, a0, 0x7f
	bgeu	a0, s6, 1f
	nop
	daddu	a0, a0, a3
	dsubu	a0, a0, s6
#if 0
	move	ta0, a0
	bal	hexserial
	nop
2:
	bal	hexserial
	nop
	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	sb	a0, 0x10(t1)
	bne	a0, ta0, 2b
	nop
#endif
#if 1
#ifdef DDR_DLL_BYPASS
	ori	a0, a0, 0x80
#endif
	sb	a0, 0x10(t1)
#endif
	lb	a0, 0x7(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	lb	a0, 0x6(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	RDOE_ADD_TRDDATA_SUB
	b	3f
	nop
1:
	dsubu	a0, a0, s6
#ifdef DDR_DLL_BYPASS
	ori	a0, a0, 0x80
#endif
	sb	a0, 0x10(t1)
3:
/*	dli	a0, 0xa1
	sb	a0, 0x10(t1)*/
	dli ta0, 0x180
	or  ta0, ta0, t8
	li	a0, 0x1
	sb  a0, 0x1(ta0)
	li	a0, 0x0
	sb	a0, 0x1(ta0)
	li	a0, 0x1
	sb  a0, 0x1(ta0)
	li	a0, 0x0
	sb	a0, 0x1(ta0)

	dli t3, 0x2
	dli	ta2, 0x5
	and	s6, s6, 0x7f
	dsubu	s6, s6, 0x6
	b 	glvl_redo_req_set_0
	nop
glvl_check_preamble:


	dsubu	s6, s6, 0x1
	bnez	s6, 1f
	nop
	daddu	s6, s6, 0x1
1:

	lb      a0, 0x7(t2)
	dli     ta0, 0x3
	and     a0, a0, ta0

	bnez	a0, test_continuous5_0
	nop
#ifdef LVL_DEBUG
	PRINTSTR("The 1 is not found\r\n")
#endif
	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and 	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	ta0, 0x4(t8)
	daddu	ta0, ta0, 0x2
1:
	blt	a0, ta0, 2f
	nop
	dsubu	a0, a0, ta0
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, 0x10(t1)
	dli	ta0, 0x7f
	and	a0, a0, ta0
	bnez	a0,	1f
	nop

	lb	a0, 0x6(t1)
	daddu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	lb	a0, 0x7(t1)
	daddu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	lb	a0, 0x7(t1)
	RDOE_SUB_TRDDATA_ADD
1:
	dli	ta2, 0x5
	b 	glvl_redo_req_set_0
	nop

test_continuous5_0:
	dsubu	ta2, ta2, 0x1
	bnez	ta2, 1f
	nop
	beq	s6, 0x1, glvl_check_preamble_end
	nop
	bgt     ta1, GCL_CNT, leveling_failed
	nop
	daddu   ta1, ta1, 0x1
	b	glvl_check_preamble_fail
	nop
1:
#ifdef PRINT_PREAMBLE_CHECK
	PRINTSTR("The 1 found in preamble test@")
	move	a0, s6
	bal 	hexserial
	nop
	move	a0, ta2
	bal 	hexserial
	nop
	PRINTSTR("\r\n")
#endif

	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and 	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	ta0, 0x4(t8)
	daddu	ta0, ta0, 0x2
1:
	blt	a0, ta0, 2f
	nop
	dsubu	a0, a0, ta0
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, 0x10(t1)
	dli	ta0, 0x7f
	and	a0, a0, ta0
	bnez	a0,	1f
	nop

	lb	a0, 0x6(t1)
	daddu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	lb	a0, 0x7(t1)
	daddu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	lb	a0, 0x7(t1)
	RDOE_SUB_TRDDATA_ADD
1:
	b	glvl_redo_req_set_0
	nop

glvl_check_preamble_fail:
	PRINTSTR("\r\nThe preamble check failed @")
	move	a0, s6
	bal 	hexserial
	nop
	PRINTSTR("\r\n")

	dli	s6, 0x0
	lb	a0, 0x6(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	lb	a0, 0x7(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	bnez	a0, 1f
	nop
	PRINTSTR("\r\nThe rd_oe become 0 in the preamble check!\r\n")
	RDOE_ADD_TRDDATA_SUB
1:


	dli	t3, 0x0
glvl_redo_req_set_0:
	dli     ta0, 0x180
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x1(ta0)
	dli	a0, 0x0
	sb      a0, 0x1(ta0)

1:			//glvl_done_sampling
	dli     ta0, 0x180
	or      ta0, ta0, t8
	lb      a0, 0x6(ta0)
	bne     a0, 0x1, 1b
	nop

#ifdef LVL_DEBUG
	PRINTSTR("\r\npreamble req\r\nrd_oe is")
	ld	a0, 0x0(t1)
	dsrl	a0, a0, 48
	and	a0, a0, 0xffff
	bal	hexserial
	nop
	lb	a0, 0x1c0(t8)
	bal	hexserial
	nop
	PRINTSTR("\r\n t1 & t2 is")
	move	a0, t1
	bal	hexserial
	nop
	move	a0, t2
	bal	hexserial
	nop
	PRINTSTR("\r\n 0x118")
	lb	a0, 0x118(t8)
	bal	hexserial
	nop
#endif

	beq	t3, 0x1, glvl_redo_resp_set1_0
	nop

	beq	t3, 0x2, glvl_check_preamble
	nop


	dli	t3, 0x1
#ifdef LVL_DEBUG
	ld	a0, 0x188(t8)
	dsrl	a0, a0, 32
	bal	hexserial
	nop
#endif
	lb      a0, 0x7(t2)
	dli     ta0, 0x3
	and     a0, a0, ta0
	beq     a0, 0x0, glvl_redo_set0_end
	nop
#ifdef LVL_DEBUG
	PRINTSTR("\r\nglvl redo set 0 add\r\n")
#endif
	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and 	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	ta0, 0x4(t8)
	daddu	ta0, ta0, 0x2
1:
	blt	a0, ta0, 2f
	nop
	dsubu	a0, a0, ta0
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, 0x10(t1)
#ifdef LVL_DEBUG
	bal	hexserial
	nop
	lb	a0, 0x10(t1)
#endif
	dli	ta0, 0x7f
	and	a0, a0, ta0
	dli	t3, 0x0
	bnez	a0,	glvl_redo_set0_end
	nop

#ifdef LVL_DEBUG
	PRINTSTR("\r\nrd_oe add 1\r\n")
#endif
	/* rd_oe_begin and rd_oe_end add 1 */
	ld      a0, 0x0(t1)
	dli     ta0, 0x0101000000000000
	daddu   a0, a0, ta0
	sd      a0, 0x0(t1)
	lb	a0, 0x7(t1)
	RDOE_SUB_TRDDATA_ADD
	/* odt_oe_begin and odt_oe_end add 1 */
	ld      a0, 0x8(t1)
	dli     ta0, 0x0000000001010000
	daddu   a0, a0, ta0
	sd      a0, 0x8(t1)

glvl_redo_set0_end:
	b	glvl_redo_req_set_0
	nop

glvl_redo_resp_set1_0:
#ifdef LVL_DEBUG
	PRINTSTR("\r\nglvl redo resp set 1\r\n")
#endif
	lb      a0, 0x7(t2)
	dli     ta0, 0x3
	and     a0, a0, ta0
	bnez    a0, preamble_check_init
	nop

	lb	a0, 0x10(t1)
	daddu	a0, a0, 0x1
	dli	ta0, 0x7f
	and 	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	lb	ta0, 0x4(t8)
	daddu	ta0, ta0, 0x2
1:
	blt	a0, ta0, 2f
	nop
	dsubu	a0, a0, ta0
	b	1b
	nop
2:
	ori	a0, 0x80
#endif
	sb	a0, 0x10(t1)
#ifdef LVL_DEBUG
	bal	hexserial
	nop
	lb	a0, 0x10(t1)
#endif
	dli	ta0, 0x7f
	and	a0, a0, ta0
	bnez	a0,	1f
	nop

#ifdef LVL_DEBUG
	PRINTSTR("\r\nrd oe add 1 @ glvl redo add\r\n")
#endif
	/* rd_oe_begin and rd_oe_end add 1 */
	ld      a0, 0x0(t1)
	dli     ta0, 0x0101000000000000
	daddu   a0, a0, ta0
	sd      a0, 0x0(t1)
	lb	a0, 0x7(t1)
	RDOE_SUB_TRDDATA_ADD
	/* odt_oe_begin and odt_oe_end add 1 */
	ld      a0, 0x8(t1)
	dli     ta0, 0x0000000001010000
	daddu   a0, a0, ta0
	sd      a0, 0x8(t1)

1:

	b	glvl_redo_req_set_0
	nop


glvl_check_preamble_end:
#ifdef  PRINT_PREAMBLE_CHECK   //print registers
	PRINTSTR("\r\nThe MC param after preamble check is:\r\n")
	PRINT_THE_MC_PARAM
#endif
	dli	s6, 0x0
	PRINTSTR("\r\nThe preamble check success\r\n")

	lb	a0, 0x7(t1)
	blt	a0, 0x4, 1f
	nop
	dsubu	a0, a0, 0x4
	sb	a0, 0x7(t1)
	sb	a0, 0x6(t1)
	RDOE_ADD_TRDDATA_SUB
1:
	dli	a3, 0x80
#ifdef DDR_DLL_BYPASS
	lb	a3, 0x4(t8)
	daddu	a3, a3, 0x2
	and	a3, a3, 0x7f
#endif
	lb	a0, 0x10(t1)
	and	a0, a0, 0x7f
	bgeu	a0, 0x4, 1f
	nop
	daddu	a0, a0, a3
	dsubu	a0, a0, 0x4
#ifdef DDR_DLL_BYPASS
	ori	a0, a0, 0x80
#endif
	sb	a0, 0x10(t1)

	lb	a0, 0x7(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	lb	a0, 0x6(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	RDOE_ADD_TRDDATA_SUB
1:
	dsubu	a0, a0, 0x4
#ifdef DDR_DLL_BYPASS
	ori	a0, a0, 0x80
#endif
	sb	a0, 0x10(t1)

#if 1
/* unknown reason to reset init_start */
	dli     ta0, 0x18
	or      ta0, ta0, t8
	dli     a0, 0x0
	sb      a0, 0x0(ta0)

	dli     ta0, 0x18
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x0(ta0)
1:
	dli     ta0, 0x160
	or      ta0, ta0, t8
	lb      a0, 0x3(ta0)
	beqz    a0, 1b
	nop
#endif

#if 0
get_burst_length: //save in t9
	dli	ta0, 0x168
	or	ta0, ta0, t8
	lb	t9, 0x4(ta0)
	daddu	t9, t9, 0x1
	dsrl	t9, t9, 0x1

	dli	ta0, 0x180//send glvl request
	or	ta0, ta0, t8
	dli	a0, 0x1
	sb	a0, 0x1(ta0)
1:
	lb	a0, 0x6(ta0) //glvl done
	bne	a0, 0x1, 1b
	nop
	lb	s3, 0x7(t2)

	dli	ta0, 0x180
	or	ta0, ta0, t8
	dli	a0, 0x1
	sb	a0, 0x1(ta0)
1:
	lb	a0, 0x6(ta0)
	bne	a0, 0x1, 1b
	nop
	lb	ta2, 0x7(t2)

//glvl response check
	dli	ta0, 0x1c
	and	s3, s3, ta0
	and	ta2, ta2, ta0
	dsrl	s3, s3, 0x2
	dsrl	ta2, ta2, 0x2
	blt	s3, 0x4, 1f
	nop
	or	ta2, ta2, 0x8
1:
	dsubu	ta2, ta2, s3
	beq	ta2, t9, glvl_last_check_end
	nop

	lb	a0, 0x7(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x7(t1)
	lb	a0,	0x6(t1)
	dsubu	a0, a0, 0x1
	sb	a0, 0x6(t1)
	RDOE_ADD_TRDDATA_SUB
	PRINTSTR("\r\nThe edges number is incorrect!\r\n")
	b	preamble_check_init
	nop
#endif
glvl_last_check_end:
	daddu	t1, t1, 0x20
	daddu	t2, t2, 0x1
	dsubu	s7, s7, 0x1
	dli     ta1,     0x0
	bnez	s7, preamble_check_init
	nop
#endif

/* set rddqs_lt_half */
	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
rddqs_lt_half_set:
#ifdef LVL_DEBUG
	PRINTSTR("\r\nsetting rddqs lt_half\r\n")
#endif
	lb	a0, OFFSET_DLL_GATE(t1)
	dli	ta0, 0x7f
	and	a0, a0, ta0
#ifdef DDR_DLL_BYPASS
	dsll    a0, a0, 0x7 // x 128
	lw      ta1, 0x4(t8) //get dll_ck value, store at ta1
	daddu   ta1, ta1, 0x2
	divu    a0, a0, ta1 //get dll_gate, no bypass mode
#endif
	lb	a1, OFFSET_DLL_WRDQ(t1)
	daddu	a0, a0, a1
	and	a0, a0, ta0
#if 0
	move	a1, a0
	bal	hexserial
	nop
	move	a0, a1
#endif
	bgeu	a0, RDDQS_LTHF_STD1, rddqs_lthalf_set1
	nop
	bltu	a0,	RDDQS_LTHF_STD2, rddqs_lthalf_set1
	nop
	b	rddqs_lthalf_set0
	nop
rddqs_lthalf_set0:
	dli	a0, 0x0
	sb	a0, OFFSET_RDDQS_LTHF(t1)
	b	1f
	nop
rddqs_lthalf_set1:
	dli	a0, 0x1
	sb	a0, OFFSET_RDDQS_LTHF(t1)
1:
	daddu	t1, t1, 0x20
	dsubu	t0, t0, 0x1
	bnez	t0, rddqs_lt_half_set
	nop

#if 1
/* unknown reason to reset init_start */
	dli     ta0, 0x18
	or      ta0, ta0, t8
	dli     a0, 0x0
	sb      a0, 0x0(ta0)

	dli     ta0, 0x18
	or      ta0, ta0, t8
	dli     a0, 0x1
	sb      a0, 0x0(ta0)
1:
	dli     ta0, 0x160
	or      ta0, ta0, t8
	lb      a0, 0x3(ta0)
	beqz    a0, 1b
	nop
#endif

#if 1
	GET_NUMBER_OF_SLICES
	dli	t1, 0x20
	or	t1, t1, t8
dll_gate_set_loop:
	beqz     t0, gate_sub_end
	nop
#ifdef LVL_DEBUG
	PRINTSTR("\r\n setting dll_gate_sub \r\n")
#endif
#ifdef DDR_DLL_BYPASS
	lb      a2, 0x4(t8) //dll_value_ck
	daddu   a2, a2, 0x2
	move	a3, a2
	dsrl    a2, a2, 0x2
	dli     ta0, 0xff
	and     a2, a2, ta0
#else
	dli	a3, 0x80
	dli     a2, DLL_GATE_SUB
#endif
	lb      a0, OFFSET_DLL_GATE(t1)
	and	a0, a0, 0x7f
	bgeu    a0, a2, dll_gate_sub20
	nop
#ifdef DDR_DLL_BYPASS
	ori     a0, a0, 0x80
	dsubu	a0, a0, a2
	daddu	a0, a0, a3
#else
	daddu	a0, a0, a3
	dsubu	a0, a0, a2
#endif
	sb      a0, OFFSET_DLL_GATE(t1)

	lb	a0, OFFSET_RDOE_BEGIN(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_BEGIN(t1)
	lb	a0, OFFSET_RDOE_END(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_RDOE_END(t1)
	RDOE_ADD_TRDDATA_SUB
/*
	lb	a0, OFFSET_ODTOE_BEGIN(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_BEGIN(t1)
	lb	a0, OFFSET_ODTOE_END(t1)
	dsubu	a0, a0, 0x1
	sb	a0, OFFSET_ODTOE_END(t1)
*/
	daddu	t1, t1, 0x20
	dsubu	t0, t0, 0x1
	b       dll_gate_set_loop
	nop
dll_gate_sub20:
	dsubu    a0, a0, a2
#ifdef DDR_DLL_BYPASS
	ori     a0, a0, 0x80
#endif
	sb      a0, OFFSET_DLL_GATE(t1)
	daddu	t1, t1, 0x20
	dsubu	t0, t0, 0x1
	b       dll_gate_set_loop
	nop
gate_sub_end:
#endif

#ifdef NO_EDGE_CHECK
#else
#if 1
/* unknown reason to reset init_start */
	dli     t1, 0x18
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	sd      a0, 0x0(t1)

	dli     t1, 0x18
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	ori     a0, a0, 0x1
	sd      a0, 0x0(t1)

1:
	dli     t1, 0x160
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000ff000000
	and     a0, a0, ta0
	beqz    a0, 1b
	nop
#endif

rd_oe_sub:

get_burst_length_half: //save in t9
	dli     t1, 0x168
	or      t1, t1, t8
	ld      t9, 0x0(t1)
	dli     ta0, 0x000000ff00000000
	and     t9, t9, ta0
	daddu   t9, t9, 0x0000000100000000
	dsrl    t9, t9, 33 // div 2


	dli     s6, 0x1
glvl_req_set_last_0:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n last 0 req")
#endif
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffff00ff
	and     a0, a0, ta0
	ori     a0, a0, 0x100
	sd      a0, 0x0(t1)

	dli     a1, 0x1
glvl_done_sampling_last_0:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00ff000000000000
	and     a0, a0, ta0
	dsrl    a0, a0, 48
	bne     a0, a1, glvl_done_sampling_last_0
	nop

glvl_resp_last_0:
	dli     s7, 0x0
	dli     t1, 0x180
	or      t1, t1, t8
	ld      s3, 0x0(t1) //save 0x180
	ld      s4, 0x8(t1) //save 0x188

glvl_req_set_last_1:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n last 1 req")
#endif
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffff00ff
	and     a0, a0, ta0
	ori     a0, a0, 0x100
	sd      a0, 0x0(t1)

	dli     a1, 0x1
glvl_done_sampling_last_1:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00ff000000000000
	and     a0, a0, ta0
	dsrl    a0, a0, 48
	bne     a0, a1, glvl_done_sampling_last_1
	nop

glvl_resp_last_1:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      t2, 0x0(t1) //lvl_resp 0
	ld      ta2, 0x8(t1) //lvl_resp 1-8

#if 1 // print the two sequence samples of leveling responds
#ifdef LVL_DEBUG
	move    ta2, s3
	dli     a0, 0x180
	and     a0, a0, 0xfff
	bal     hexserial
	nop
	PRINTSTR(":  ")
	dsrl    a0, ta2, 32
	bal     hexserial
	nop
	//PRINTSTR("  ")
	move    a0, ta2
	bal     hexserial
	nop
	PRINTSTR("\r\n")

	move    ta2, s4
	dli     a0, 0x188
	and     a0, a0, 0xfff
	bal     hexserial
	nop
	PRINTSTR(":  ")
	dsrl    a0, ta2, 32
	bal     hexserial
	nop
	//PRINTSTR("  ")
	move    a0, ta2
	bal     hexserial
	nop
	PRINTSTR("\r\n")

	dli     t1, 0x180
	or      t1, t1, t8
	ld      ta2, 0x0(t1) //lvl_resp 0
	move    a0, t1
	and     a0, a0, 0xfff
	bal     hexserial
	nop
	PRINTSTR(":  ")
	dsrl    a0, ta2, 32
	bal     hexserial
	nop
	//PRINTSTR("  ")
	move    a0, ta2
	bal     hexserial
	nop
	PRINTSTR("\r\n")

	dli     t1, 0x188
	or      t1, t1, t8
	ld      ta2, 0x0(t1) //lvl_resp 0
	move    a0, t1
	and     a0, a0, 0xfff
	bal     hexserial
	nop
	PRINTSTR(":  ")
	dsrl    a0, ta2, 32
	bal     hexserial
	nop
	//PRINTSTR("  ")
	move    a0, ta2
	bal     hexserial
	nop
	PRINTSTR("\r\n")
#endif
#endif

	dli     t1, 0x180
	or      t1, t1, t8
	ld      t2, 0x0(t1) //lvl_resp 0
	ld      ta2, 0x8(t1) //lvl_resp 1-8

#if 1 //debug
glvl_resp_check_0:
	dli     ta0, 0x1c00000000000000
	and     t3, t2, ta0 //second sample
	and     ta1, s3, ta0 //first sample
	dsrl    t3, t3, 58
	dsrl    ta1, ta1, 58
	dli     ta0, 0x4
	bge     ta1, ta0, 1f //lvl_resp[4:2] ge 0x4
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_0_sub
	nop

glvl_resp_check_1:
	dli     ta0, 0x000000000000001c
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 2
	dsrl    ta1, ta1, 2
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_1_sub
	nop

glvl_resp_check_2:
	dli     ta0, 0x0000000000001c00
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 10
	dsrl    ta1, ta1, 10
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_2_sub
	nop

glvl_resp_check_3:
	dli     ta0, 0x00000000001c0000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 18
	dsrl    ta1, ta1, 18
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_3_sub
	nop

glvl_resp_check_4:
	dli     ta0, 0x000000001c000000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 26
	dsrl    ta1, ta1, 26
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_4_sub
	nop

glvl_resp_check_5:
	dli     ta0, 0x0000001c00000000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 34
	dsrl    ta1, ta1, 34
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_5_sub
	nop

glvl_resp_check_6:
	dli     ta0, 0x00001c0000000000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 42
	dsrl    ta1, ta1, 42
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_6_sub
	nop

glvl_resp_check_7:
	dli     ta0, 0x001c000000000000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 50
	dsrl    ta1, ta1, 50
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_7_sub
	nop

glvl_resp_check_8:
/* identify wheather there is ecc slice */
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 3f //when rd_after_write enabled, the 9th device may don't need leveling
	nop

	dli     ta0, 0x1c00000000000000
	and     t3, ta2, ta0 //second sample
	and     ta1, s4, ta0 //first sample
	dsrl    t3, t3, 58
	dsrl    ta1, ta1, 58
	dli     ta0, 0x4
	bge     ta1, ta0, 1f
	nop
	b       2f
	nop
1:
	ori     t3, t3, 0x8
2:
	dsubu   t3, t3, ta1
	bne     t3, t9, rd_oe_8_sub
	nop

3:
	beq     s7, s6, rd_oe_sub
	nop

	b       gate_leveling_exit
	nop

rd_oe_0_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_0 and rd_odt_0 sub")
#endif
	dli     t1, 0x028
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x030
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_1
	dli     s7, 0x1

rd_oe_1_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_1 and rd_odt_1 sub")
#endif
	dli     t1, 0x048
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x050
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_2
	dli     s7, 0x1

rd_oe_2_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_2 and rd_odt_2 sub")
#endif
	dli     t1, 0x068
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x070
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_3
	dli     s7, 0x1

rd_oe_3_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_3 and rd_odt_3 sub")
#endif
	dli     t1, 0x088
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x090
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_4
	dli     s7, 0x1

rd_oe_4_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_4 and rd_odt_4 sub")
#endif
	dli     t1, 0x0a8
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x0b0
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_5
	dli     s7, 0x1

rd_oe_5_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_5 and rd_odt_5 sub")
#endif
	dli     t1, 0x0c8
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x0d0
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_6
	dli     s7, 0x1

rd_oe_6_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_6 and rd_odt_6 sub")
#endif
	dli     t1, 0x0e8
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	//sd      a0, 0x0(t1)

	dli     t1, 0x0f0
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	//sd      a0, 0x0(t1)
	b       glvl_resp_check_7
	dli     s7, 0x1

rd_oe_7_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_7 and rd_odt_7 sub")
#endif
	dli     t1, 0x108
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x110
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       glvl_resp_check_8
	dli     s7, 0x1

rd_oe_8_sub:
#ifdef LVL_DEBUG
	PRINTSTR("\r\n rd_oe_8 and rd_odt_8 sub")
#endif
	dli     t1, 0x128
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0101000000000000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)

	dli     t1, 0x130
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     t2, 0x0000000001010000
	dsubu   a0, a0, t2
	sd      a0, 0x0(t1)
	b       rd_oe_sub
	dli     s7, 0x1

#endif //debug
#endif

gate_leveling_exit:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	sd      a0, 0x0(t1)


//   dli      t1, 0x0000002020187803
//   sd       t1, 0xb8(t8)
/* unknown reason to reset init_start */
reset_init_start3:
	dli     t1, 0x18
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	sd      a0, 0x0(t1)

	dli     t1, 0x18
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	ori     a0, a0, 0x1
	sd      a0, 0x0(t1)

wait_init_done3:
	dli     t1, 0x160
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0x00000000ff000000
	and     a0, a0, ta0
	beqz    a0, wait_init_done3
	nop

#ifdef DDR_DLL_BYPASS //bypass dll_wrdqs, dll_wrdata and  dll_rddqs_p/n
	dli     t1, 0x0
	or      t1, t1, t8
	ld      a1, 0x0(t1)
	dli     ta0, 0x0000ffff00000000
	and     a1, a1, ta0
	dsrl    a1, a1, 32 // dll_value store in a1
//	daddu   a1, a1, 0x2

/* identify wheather there is ecc slice */
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 1f //when rd_after_write enabled, the 9th device may don't need leveling
	nop
	dli     t3, 0x9 //loop times
	b       2f
	nop

1:
	dli     t3, 0x8 //loop times

2:

	dli     t1, 0x38
	or      t1, t1, t8
3:
	//set dll_wrdata
	lb      a0, 0x1(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x1(t1)

	//set dll_wrdqs
	lb      a0, 0x2(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x2(t1)

	//set dll_rddqs_p
	lb      a0, 0x3(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x3(t1)

	//set dll_rddqs_n
	lb      a0, 0x4(t1)
	dmulou  a0, a1, a0
	dsrl    a0, a0, 0x7
	ori     a0, a0, 0x80
	sb      a0, 0x4(t1)

	subu    t3, t3, 0x1
	daddu   t1, t1, 0x20
	bnez    t3, 3b
	nop

#endif

#ifdef PM_DPD_FRE
//when rd_oe_start/stop is set to 0x2, the rddqs_lt_half should be reversed
//because the rd_oe_start/stop only changed in this file, and all the rd_oe_start/stop change at the same time, here we only consider the rd_oe_start/stop of slice0
	lh      a0, 0x2c(t8)
	dli     ta0, 0x0202
	bne     ta0, a0, 3f
	nop

/* identify wheather there is ecc slice */
	li      t0, 0x8
	dli     t1, 0x250
	or      t1, t1, t8
	lb      a0, 0x2(t1)
	dli     t1, 0x1
	and     a0, a0, t1
	bne     a0, t1, 1f //when rd_after_write enabled, the 9th device may don't need leveling
	nop
	daddu   t0, t0, 0x1

1:

	dli     t1, 0x20
	or      t1, t1, t8

2:
	lb      a0, 0x2(t1)
	xori    a0, 0x1
	sb      a0, 0x2(t1)
	daddu   t1, t1, 0x20
	dsubu   t0, t0, 0x1
	bnez    t0, 2b
	nop

3:

#endif




100:
#if 0
test_memory:
	dli     t0, 0x9000000000000000
	GET_NODE_ID_a0
	or      t0, t0, a0
	dli     a0, 0x5555555555555555
	sd      a0, 0x0(t0)
	dli     a0, 0xaaaaaaaaaaaaaaaa
	sd      a0, 0x8(t0)
	dli     a0, 0x3333333333333333
	sd      a0, 0x10(t0)
	dli     a0, 0xcccccccccccccccc
	sd      a0, 0x18(t0)
	dli     a0, 0x7777777777777777
	sd      a0, 0x20(t0)
	dli     a0, 0x8888888888888888
	sd      a0, 0x28(t0)
	dli     a0, 0x1111111111111111
	sd      a0, 0x30(t0)
	dli     a0, 0xeeeeeeeeeeeeeeee
	sd      a0, 0x38(t0)

	dli     ta1, 0x9000000000000000
	GET_NODE_ID_a0
	or      ta1, ta1, a0
	ld      ta2, 0x30(ta1)
	dli     t2, 0x5555555555555555
	beq	ta2, t2, 2f
	nop
	ld	ta2, 0x20(ta1)
	beq	ta2, t2, 2f
	nop
	ld	ta2, 0x10(ta1)
	beq	ta2, t2, 2f
	nop
	ld	ta2, 0x00(ta1)
	beq	ta2, t2, 3f
	nop
	PRINTSTR("\r\nthe memory test failed!\r\n")
	b	4f
	nop

2:
	dli	t1, 0x1d0
	or	t1, t1, t8
	lb	a0, 0x4(t1)
	dsubu	a0,	a0, 0x1
	sb	a0, 0x4(t1)
	b	test_memory
	nop
3:
	PRINTSTR("the memory test sucess!\r\n")
	nop
4:
#endif
//set pm_dll_bypass
	dli     t1, 0x1
	sb      t1, 0x19(t8)
//remove dll_close_disable and dll_reync_disable
	dli     t1, 0x0
	sb      t1, 0x7(t8)

leveling_failed:
	dli     t1, 0x180
	or      t1, t1, t8
	ld      a0, 0x0(t1)
	dli     ta0, 0xffffffffffffff00
	and     a0, a0, ta0
	sd      a0, 0x0(t1)

	move    ra, s5
	jr      ra
	nop
	.end    ddr3_leveling
/*
LEAF(hexserial4)
	move	a2, ra
	move	a1, a0
	li	a3, 0
1:
	rol	a0, a1, 4
	move	a1, a0
	and	a0, 0xf
	la	v0, hexchar
	addu	v0, s0
	addu	v0, a0
	bal	tgt_putchar
	lbu	a0, 0(v0)

	bnez	a3, 1b
	addu	a3, -1

	move	ra, a2
	j	ra
	nop
END(hexserial4)*/
